Reproducibility Study: Figure Compilation

Author

Michael C. Saul, michael.saul [at] jax.org

Reproducibility Study: Figure Compilation

Introduction

Motivation

This document compiles figures early 2024 DIVA Reproducibility Study. The data were wrangled in the script labelled 100_data_wrangling.html and analyzed in the script labelled 101_initial_analysis.html.

Analysis

Setup

Libraries

Getting R libraries.

# R code

# Getting essential libraries
library("tidyverse")
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   4.0.0     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library("janitor")

Attaching package: 'janitor'

The following objects are masked from 'package:stats':

    chisq.test, fisher.test
library("here")
here() starts at /Users/saulm/Library/CloudStorage/Box-Box/DIVA_JAX_CS/dsw_reproducibility_3site
library("ggokabeito")
library("zoo")

Attaching package: 'zoo'

The following objects are masked from 'package:base':

    as.Date, as.Date.numeric
library("nlme")

Attaching package: 'nlme'

The following object is masked from 'package:dplyr':

    collapse
library("ggpattern")
library("cowplot")

Attaching package: 'cowplot'

The following object is masked from 'package:lubridate':

    stamp
library("Cairo")
library("ggtext")
library("ggnewscale")
library("ggrepel")
library("viridis")
Loading required package: viridisLite

Variables

# R code

pt_to_mm = 0.3527777778
mm_to_pt = 1 / pt_to_mm

envision_colors = c("#23356E","#2D6FB7","#692F90","#9F1D80","#D62770")
library("RColorBrewer")
envision_24_colors = colorRampPalette(envision_colors)(24)

ggplot2 Themes

# R code

pubtheme_classic = theme_classic() +
  theme(axis.title = element_text(size = 8,
                                  color = "#000000"), # text size is in points
        axis.text = element_text(size = 8,
                                 color = "#000000"), # text size is in points
        axis.ticks = element_line(color = "#000000",
                                  linewidth = (0.5 * pt_to_mm), # line size is in mm
                                  lineend = "round"),
        axis.line = element_line(color = "#000000",
                                 linewidth = (0.5 * pt_to_mm), # line size in mm
                                 lineend = "round"),
        legend.text = element_text(size = 6, # text size is in points
                                   color = "#000000"),
        legend.title = element_text(size = 8, # text size is in points
                                    color = "#000000"),
        panel.grid = element_line(color = "#FFFFFF"),
        strip.text = element_text(size = 8, color = "#000000")) # text size is in points

pubtheme_bw = theme_bw() +
  theme(axis.title = element_text(size = 8, # text size in points
                                  color = "#000000",
                                  family = "Helvetica"), # text size in points
        axis.text = element_text(size = 8, # text size in points
                                 color = "#000000",
                                 family = "Helvetica"), # text size in points
        axis.ticks = element_line(color = "#000000",
                                  linewidth = (0.5 * pt_to_mm), # line size in mm
                                  lineend = "round"),
        axis.line = element_line(color = "#000000",
                                 linewidth = (0.5 * pt_to_mm), # line size in mm
                                 lineend = "round"),
        legend.text = element_text(size = 6, # text size in points
                                   color = "#000000",
                                   family = "Helvetica"),
        legend.title = element_text(size = 8, # text size in points
                                    color = "#000000",
                                    family = "Helvetica"),
        panel.grid = element_line(color = "#FFFFFF"),
        strip.text = element_text(size = 8, color = "#000000", family = "Helvetica"))

pubtheme_cowplot = cowplot::theme_cowplot() +
  theme(axis.title = element_text(size = 8, # text size in points
                                  color = "#000000"), # text size in points
        axis.text = element_text(size = 6, # text size in points
                                 color = "#000000"), # text size in points
        axis.ticks = element_line(color = "#000000",
                                  linewidth = (0.25 * pt_to_mm), # line size in mm
                                  lineend = "round"),
        axis.line = element_line(color = "#000000",
                                 linewidth = (0.5 * pt_to_mm), # line size in mm
                                 lineend = "round"),
        legend.text = element_text(size = 6, # text size in points
                                   color = "#000000"),
        legend.title = element_text(size = 8, # text size in points
                                    color = "#000000"),
        panel.grid = element_line(color = "#FFFFFF"),
        strip.text = element_text(size = 8, color = "#000000"))

Data

# R code

load("reproducibility_3site_analysis_data.RData")

Figure 1 Panels

Meters per day plot

# R code

meters_day_plot <- ggplot(data = activity_24h_mean, 
                          aes(x = strain, y = meters_day, color = strain)) + 
  geom_boxplot(outlier.color = NA) + 
  ggbeeswarm::geom_beeswarm(cex = 3, size = 0.5, priority = "random",
                            method = "hex") + 
  pubtheme_bw +
  theme(legend.position = "none",
        panel.grid = element_line(color = "#FFFFFF")) +
  scale_color_okabe_ito(name = "Genotype", order = okabe_order[c(4:9,1:3)]) +
  xlab("Genotype") +
  ylab("Daily Activity (m/day)")

meters_day_plot
Warning: In `position_beeswarm`, method `hex` discretizes the data axis (a.k.a the
continuous or non-grouped axis).
This may result in changes to the position of the points along that axis,
proportional to the value of `cex`.
This warning is displayed once per session.

Rolling Mean Plot

# R code

rollmean_plot <- repro_activity_1hr_rollmean_df |>
  dplyr::mutate(strain_sex = ifelse(sex == "Female", "F", "M"),
                strain_sex = paste0(strain, " ", strain_sex),
                strain_sex = factor(strain_sex,
                                    levels = c("A/J F", "A/J M",
                                               "C57BL/6J F", "C57BL/6J M",
                                               "J:ARC F", "J:ARC M"),
                                    ordered = TRUE)) |>
  ggplot(aes(x = experiment_time_h,
             y = tsd_trend,
             color = site,
             group = site_rep_mouse_sex_cagename)) +
  geom_line(size = 0.25) +
  facet_grid(replicate ~ strain_sex) +
  pubtheme_bw +
  scale_x_continuous(breaks = c(0,240,480)) +
  theme(legend.position = "bottom",
        panel.grid = element_line(color = "#FFFFFF")) +
  scale_color_okabe_ito(name = "Site", order = okabe_order) +
  xlab("Experiment Time (hours)") +
  ylab("Rolling Mean Activity (cm/s)")
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
rollmean_plot
Warning: Removed 1275 rows containing missing values or values outside the scale range
(`geom_line()`).

Meters per day ANOVA

# R code

meters_per_day_aov = meters_per_day_aov |>
  dplyr::mutate(factor = as.character(factor),
                factor = gsub("Replicate","Ship",factor),
                factor = gsub("Genetics","Genotype",factor),
                factor = factor(factor, levels = factor, ordered = TRUE))

meters_day_pve_plot <- ggplot(data = meters_per_day_aov,
                              aes(x = study, y = pve, fill = factor)) +
  geom_col(stat = "identity", color = "#444455", size = 0.25) + 
  scale_fill_manual(name = "Factor", values = okabe_order_factors) +
  pubtheme_bw +
  xlab(NULL) +
  ylab("% Variance Explained") +
  theme(legend.position = "right",
        panel.grid = element_line(color = "#FFFFFF")) +
  coord_cartesian(expand = FALSE)
Warning in geom_col(stat = "identity", color = "#444455", size = 0.25):
Ignoring unknown parameters: `size`
meters_day_pve_plot

24-hour percent variance explained

# R code

pve_24hr_plot <- df_pve |>
  dplyr::mutate(factor = as.character(factor),
                factor = gsub("Replicate","Ship",factor),
                factor = gsub("Genetics","Genotype",factor),
                factor = gsub("^Ship:Genotype$","Genotype:Ship",factor),
                factor = gsub("^Site:Genotype$","Genotype:Site",factor),
                factor = factor(factor, levels = levels(meters_per_day_aov$factor), ordered = TRUE)) |>
  ggplot(aes(x = as.numeric(start_time_local), 
             y = percent_variance_explained,
             fill = factor)) +
  geom_bar(position="stack", stat="identity", color = "#444455", width = 1, size = 0.25) +
  scale_x_continuous(breaks = seq(from = 1, to = 24, by = 4), labels = c("04:00","08:00","12:00","16:00","20:00","00:00")) +
  pubtheme_bw +
  scale_fill_manual(name = "Factor", values = okabe_order_factors) +
  xlab("Time of Day") +
  ylab("% Variance Explained") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
        panel.grid = element_line(color = "#FFFFFF"),
        legend.position = "bottom") +
  coord_cartesian(expand = FALSE)
Warning in geom_bar(position = "stack", stat = "identity", color = "#444455", :
Ignoring unknown parameters: `size`
pve_24hr_plot

Strain-by-sex 24 hour plot

# R code

strainsex_average_plot = ggplot(data = repro_activity_1hr_hourly_summary, aes(x = as.numeric(start_time_local),
                                                                              y = mean,
                                                                              color = strain)) +
  geom_rect(xmin = 15, xmax = 25, ymin = -Inf, ymax = Inf, 
            alpha = 1, fill = "#DDDDDD", color = NA) +
  geom_ribbon(aes(ymin = ll, ymax = ul, fill = strain),
              alpha = 0.5, color = NA) +
  geom_line() +
  facet_wrap(sex ~ ., nrow = 2) +
  pubtheme_bw +
  scale_x_continuous(breaks = seq(from = 1, to = 24, by = 4), labels = c("04:00","08:00","12:00","16:00","20:00","00:00"), expand = c(0,0)) +
  theme(legend.position = "bottom",
        axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
        panel.grid = element_line(color = "#FFFFFF")) +
  xlab("Time of Day") +
  ylab("Mean Activity (cm/s)") +
  scale_color_okabe_ito(name = "Genotype", order = okabe_order[c(4:9,1:3)]) +
  scale_fill_okabe_ito(name = "Genotype", order = okabe_order[c(4:9,1:3)])

strainsex_average_plot

Hourly PVE plot

# R code

pve_hourly_plot <- df_pve |>
  dplyr::mutate(factor = as.character(factor),
                factor = gsub("Genetics","Genotype",factor),
                factor = gsub("Replicate","Ship",factor),
                factor = gsub("^Ship:Genotype$","Genotype:Ship",factor),
                factor = gsub("^Site:Genotype$","Genotype:Site",factor),
                factor = factor(factor, levels = levels(meters_per_day_aov$factor), ordered = TRUE)) |>
  ggplot(aes(x = as.numeric(start_time_local), 
             y = percent_variance_explained, 
             fill = factor)) +
  geom_bar(position="stack", stat="identity", color = "#444455", width = 1) +
  scale_x_continuous(breaks = seq(from = 1, to = 24, by = 4), labels = c("04:00","08:00","12:00","16:00","20:00","00:00")) +
  pubtheme_bw +
  scale_fill_manual(name = "Factor", values = okabe_order_factors) +
  xlab("Time of Day") +
  ylab("% Variance Explained") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
        panel.grid = element_line(color = "#FFFFFF"),
        legend.position = "bottom")

pve_hourly_plot

Coefficient of Variation Plot

# R code

repro_activity_1hr_rollmean_cv = repro_activity_1hr_rollmean_df |> 
  dplyr::group_by(study_id, study_name, site, time_zone, replicate, cage_id, cage_name, strain, sex) |> 
  dplyr::summarize(mean_rollmean = mean(tsd_trend, na.rm = TRUE), 
                   sd_rollmean = sd(tsd_trend, na.rm = TRUE)) |> 
  dplyr::ungroup() |> 
  dplyr::mutate(cov_rollmean = sd_rollmean / mean_rollmean)
`summarise()` has grouped output by 'study_id', 'study_name', 'site',
'time_zone', 'replicate', 'cage_id', 'cage_name', 'strain'. You can override
using the `.groups` argument.
cov_rollmean_plot <- ggplot(data = repro_activity_1hr_rollmean_cv, 
                            aes(x = strain, y = cov_rollmean, color = strain)) + 
  geom_boxplot(na.rm = TRUE) + 
  ggbeeswarm::geom_beeswarm(cex = 3, size = 0.5, priority = "random",
                            method = "hex") + 
  pubtheme_bw + 
  theme(legend.position = "none",
        panel.grid = element_line(color = "#FFFFFF")) + 
  scale_color_okabe_ito(name = "Genotype", order = okabe_order[c(4:9,1:3)]) + 
  xlab("Genotype") +
  ylab("CoV of Rolling Mean")

cov_rollmean_plot

Detrended CoV Table

# R code

cov_detrended = repro_activity_1hr_rollmean_df |>
  ungroup() |>
  group_by(site, replicate, cage_name, sex, strain, study_name) |>
  summarize(mean_detrended = mean(tsd_detrended, na.rm = TRUE),
            sd_detrended = sd(tsd_detrended, na.rm = TRUE)) |>
  dplyr::mutate(cov_detrended = sd_detrended / mean_detrended)
`summarise()` has grouped output by 'site', 'replicate', 'cage_name', 'sex',
'strain'. You can override using the `.groups` argument.
as.data.frame(anova(lm(cov_detrended ~ strain + sex + site + replicate + strain:sex + strain:site + strain:replicate,
                       data = cov_detrended))) |>
  dplyr::mutate(pve = 100 * (`Sum Sq` / sum(`Sum Sq`)))

Detrended CoV Plot

# R code

cov_plot = cov_detrended |>
  dplyr::mutate(replicate = gsub("Replicate", "Ship Date", replicate)) |>
  ggplot(aes(x = replicate, y = cov_detrended)) +
  geom_boxplot(aes(color = replicate), 
               outlier.colour = NA) +
  scale_color_manual(values = c("#AAAAAA","#777777","#444444")) +
  ggnewscale::new_scale_color() +
  ggbeeswarm::geom_beeswarm(aes(color = strain), cex = 3, priority = "random",
                            method = "hex", size = 0.5) +
  scale_color_okabe_ito(name = "Genotype", order = okabe_order[c(4:9,1:3)]) +
  xlab(NULL) +
  ylab("CoV of Detrended Data") +
  pubtheme_bw +
  theme(legend.position = "none")

cov_plot

Figure 1 compile

# R code

fig1_row1 = plot_grid(rollmean_plot + theme(legend.position = "none", panel.margin = unit(0, "mm")), 
                      labels = c('A'),
                      label_size = 12, label_fontfamily = "Helvetica", label_fontface = "bold")
Warning in plot_theme(plot): The `panel.margin` theme element is not defined in
the element hierarchy.
Warning: Removed 1275 rows containing missing values or values outside the scale range
(`geom_line()`).
fig1_row2 = plot_grid(cov_plot,
                      meters_day_plot + theme(legend.position = "none") + xlab(NULL), 
                      meters_day_pve_plot + theme(legend.key.height = unit(4,"mm")), 
                      labels = c('B', 'C', 'D'), rel_widths = c(1.125,1.125,1), nrow = 1,
                      label_size = 12, label_fontfamily = "Helvetica", label_fontface = "bold")
fig1_row3 = plot_grid(strainsex_average_plot + theme(legend.position = "none"), 
                      pve_24hr_plot + theme(legend.position = "none"), 
                      labels = c('E', 'F'), rel_widths = c(1, 1),
                      label_size = 12, label_fontfamily = "Helvetica", label_fontface = "bold")

fig1_rows123 = plot_grid(fig1_row1, 
                         fig1_row2, 
                         fig1_row3, 
                         ncol = 1, 
                         rel_heights = c(3,2.25,3))

ggsave("./output/figure1_draft.pdf", fig1_rows123, width = 180, height = 185, units = "mm")

fig1_rows123

Figure 2 Panels

PVE Simulations Plot

# R code

pve_sims = df_pve_rand_summary |>
  dplyr::filter(sample_size %in% c(1,3,10,20)) |>
  dplyr::mutate(sample_size = paste(sample_size, "Days"),
                sample_size = gsub("^1 Days$", "1 Day", sample_size),
                sample_size = factor(paste("Duration:", sample_size),
                                     levels = paste("Duration:", c(1,3,10,20), 
                                                    c("Day","Days","Days","Days")),
                                     ordered = TRUE),
                factor = str_to_title(factor),
                factor = gsub("sex", "Sex", factor),
                factor = gsub("[Ss]train", "Genotype", factor),
                factor = factor(factor,
                                levels = c("Residuals","Genotype","Sex",
                                           "Genotype:Sex","Site","Replicate",
                                           "Site:Genotype","Replicate:Genotype"),
                                ordered = TRUE)) |>
  dplyr::mutate(factor = as.character(factor),
                factor = gsub("Replicate","Ship",factor),
                factor = gsub("^Ship:Genotype$","Genotype:Ship",factor),
                factor = gsub("^Site:Genotype$","Genotype:Site",factor),
                factor = factor(factor, levels = levels(meters_per_day_aov$factor), ordered = TRUE)) |>
  ggplot(aes(x = as.numeric(start_time_local), 
             y = rescaled_median_pve, 
             fill = factor)) +
  geom_bar(position="stack", stat="identity", color = "#444455", width = 1, size = 0.25) +
  scale_x_continuous(breaks = seq(from = 1, to = 24, by = 4), labels = c("04:00","08:00","12:00","16:00","20:00","00:00"), expand = c(0,0)) +
  scale_y_continuous(expand = c(0,0)) +
  facet_wrap(. ~ sample_size, ncol = 2) +
  pubtheme_bw +
  scale_fill_manual(name = NULL, values = okabe_order_factors) +
  xlab("Time of Day") +
  ylab("Median % Variance Explained (Rescaled)") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
        panel.grid = element_line(color = "#FFFFFF"),
        legend.position = "bottom")
Warning in geom_bar(position = "stack", stat = "identity", color = "#444455", :
Ignoring unknown parameters: `size`
pve_sims

PVE Reproducibility

# R code

pve_repro = df_pve_strain_reprod_summary |>
  dplyr::mutate(n_sig = paste(n_sig, "sig.")) |>
  ggplot(aes(x = sample_size, y = percent, fill = n_sig)) +
  geom_bar(position="stack", stat="identity", color = "#444455", width = 1, size = 0.25) +
  pubtheme_bw +
  scale_x_continuous(breaks = c(5,10,15,20), expand = c(0,0)) +
  scale_y_continuous(expand = c(0,0)) +
  scale_fill_manual(name = NULL, values = c("#FFFFFF","#BBBBBB","#777777","#222222")) +
  theme(legend.position = "bottom",
        panel.grid = element_line(color = "#FFFFFF")) +
  ylab("% Tests") +
  xlab("Duration (days)")
Warning in geom_bar(position = "stack", stat = "identity", color = "#444455", :
Ignoring unknown parameters: `size`
pve_repro

Hourly Reproducibility Plot

# R code

reprod_hour_plot = df_pve_strain_reprod_20days |>
  dplyr::mutate(n_sig = paste(n_sig, "sig.")) |>
  ggplot(aes(x = as.numeric(start_time_local), y = percent, fill = n_sig)) +
  geom_bar(position="stack", stat="identity", color = "#444455", width = 1, size = 0.25) +
  pubtheme_bw +
  scale_x_continuous(breaks = seq(from = 1, to = 24, by = 4), labels = c("04:00","08:00","12:00","16:00","20:00","00:00"), expand = c(0,0)) +
  scale_y_continuous(expand = c(0,0)) +
  scale_fill_manual(name = NULL, values = c("#FFFFFF","#BBBBBB","#777777","#222222")) +
  theme(legend.position = "bottom",
        panel.grid = element_line(color = "#FFFFFF")) +
  ylab("% Tests") +
  xlab("Time of Day")
Warning in geom_bar(position = "stack", stat = "identity", color = "#444455", :
Ignoring unknown parameters: `size`
reprod_hour_plot

PVE Duration and Sample Size Plot

# R code

strain_pve_summary = df_pve_rand_summary |> 
  filter(factor == "strain") |> 
  dplyr::mutate(cohens_d_equiv = median_pve / (100 - median_pve))

strain_pve_summary$n_power = -100
within_var = 1000
power_cubert = 0.8 ^ (1/3)

for (i in seq_len(nrow(strain_pve_summary))) {
  r2_i = dplyr::pull(strain_pve_summary,"rescaled_median_pve")[i] / 100
  total_var_i = within_var / (1 - r2_i)
  between_var_i = r2_i * total_var_i
  anova_n_i = power.anova.test(groups = 3,
                               between.var = between_var_i,
                               within.var = within_var,
                               power = power_cubert, 
                               sig.level = 0.05)
  strain_pve_summary[i,"n_power"] = ceiling(anova_n_i$n)
}

strain_pve_summary$label_n = ifelse(strain_pve_summary$sample_size %in% c(1,20),
                                    paste0('n = ', strain_pve_summary$n_power),
                                    NA)

sample_size_range = range(strain_pve_summary$n_power)
rescaled_pve_range = range(strain_pve_summary$rescaled_median_pve)
scale_factor = (sample_size_range[2]) / (rescaled_pve_range[2] - rescaled_pve_range[1])

pve_sample_size_plot = strain_pve_summary |>
  dplyr::mutate(has_label = ifelse(is.na(label_n),
                                   NA, "1")) |>
  ggplot(aes(x = sample_size, 
             y = rescaled_median_pve, 
             color = start_time_local)) + 
  geom_point(size = 0.5, shape = 3) +
  geom_step(aes(y = (n_power + 7) / scale_factor), direction = "hv") +
  # geom_label_repel(aes(y = (n_power + 7) / scale_factor, label = label_n), size = 6 * pt_to_mm) +
  # geom_point(aes(y = (n_power + 7) / scale_factor, shape = has_label), size = 0.75) +
  pubtheme_bw +
  scale_y_continuous(
    name = "% Variance Explained", limits = rescaled_pve_range,
    sec.axis = sec_axis(~ (. * scale_factor) - 7, name = "Replicable Sample Size (n)")
  ) +
  xlab("Study Duration (days)") +
  scale_color_manual(values = envision_24_colors) +
  ylab("% Variance Explained") +
  facet_wrap(. ~ start_time_local, nrow = 4) + 
  theme(panel.grid = element_line(color = "#FFFFFF"), 
        legend.position = "none",
        panel.margin = unit(1, "mm"))

pve_sample_size_plot
Warning in plot_theme(plot): The `panel.margin` theme element is not defined in
the element hierarchy.

Collapsed PVE Plot

# R code

pve_summary_collapsed = df_pve_rand |> 
  dplyr::group_by(factor, sample_size) |>
  dplyr::summarize(median_pve = median(percent_variance_explained, na.rm = TRUE)) |>
  dplyr::ungroup() |>
  dplyr::group_by(sample_size) |>
  dplyr::mutate(rescaled_median_pve = 100 * median_pve / sum(median_pve))
`summarise()` has grouped output by 'factor'. You can override using the
`.groups` argument.
strain_pve_summary_collapsed = pve_summary_collapsed |>
  dplyr::filter(factor == "strain") |> 
  dplyr::mutate(cohens_d_equiv = rescaled_median_pve / (100 - rescaled_median_pve))

strain_pve_summary_collapsed$n_power = -100
within_var = 1000
power_cubert = 0.8 ^ (1/3)

for (i in seq_len(nrow(strain_pve_summary_collapsed))) {
  r2_i = dplyr::pull(strain_pve_summary_collapsed,"rescaled_median_pve")[i] / 100
  total_var_i = within_var / (1 - r2_i)
  between_var_i = r2_i * total_var_i
  anova_n_i = power.anova.test(groups = 3,
                               between.var = between_var_i,
                               within.var = within_var,
                               power = power_cubert, 
                               sig.level = 0.05)
  strain_pve_summary_collapsed[i,"n_power"] = ceiling(anova_n_i$n)
}

strain_pve_summary_collapsed$label_n = ifelse(strain_pve_summary_collapsed$sample_size %in% c(1,20),
                                              paste0('n = ', strain_pve_summary_collapsed$n_power),
                                              NA)

sample_size_range = range(strain_pve_summary_collapsed$n_power)
rescaled_pve_range = range(strain_pve_summary_collapsed$rescaled_median_pve)
scale_factor = (sample_size_range[2]) / (rescaled_pve_range[2] - rescaled_pve_range[1])
offset = rescaled_pve_range[1] * scale_factor 

pve_sample_size_plot_collapsed = strain_pve_summary_collapsed |>
  dplyr::mutate(has_label = ifelse(is.na(label_n),
                                   NA, "1")) |>
  ggplot(aes(x = sample_size, 
             y = rescaled_median_pve)) + 
  geom_step(aes(y = (n_power + offset) / scale_factor - 5), direction = "hv", color = "#008FC0", size = 1) +
  geom_point(size = 3, shape = 3, color = "#ED8B00", stroke = 1) +
  # geom_label_repel(aes(y = (n_power + 7) / scale_factor, label = label_n), size = 6 * pt_to_mm) +
  # geom_point(aes(y = (n_power + 7) / scale_factor, shape = has_label), size = 0.75) +
  pubtheme_bw +
  scale_y_continuous(
    name = "Variance Explained (orange + signs)", limits = rescaled_pve_range,
    sec.axis = sec_axis(~ (. * scale_factor) - offset + 5, name = "Replicable Sample Size (blue stair step)")
  ) +
  xlab("Study Duration (days)") +
  theme(panel.grid = element_line(color = "#FFFFFF"), 
        legend.position = "none",
        panel.margin = unit(1, "mm"))

pve_sample_size_plot_collapsed
Warning in plot_theme(plot): The `panel.margin` theme element is not defined in
the element hierarchy.

Compile Figure 2

# R code

fig2_col1 = plot_grid(pve_sims + theme(legend.position = "bottom", 
                                       panel.margin = unit(1, "mm"),
                                       legend.key.height = unit(3,"mm"),
                                       legend.key.width = unit(3,"mm"),
                                       legend.margin = margin(t = -5)), 
                      labels = c('A','B'),
                      label_size = 12, label_fontfamily = "Helvetica", label_fontface = "bold",
                      rel_widths = c(2,1), ncol = 1)
Warning in plot_theme(plot): The `panel.margin` theme element is not defined in
the element hierarchy.
fig2_col2 = plot_grid(pve_repro + theme(legend.position = "none"),
                      reprod_hour_plot + theme(legend.position = "bottom", 
                                               legend.key.height = unit(3,"mm"),
                                               legend.key.width = unit(3,"mm"),
                                               legend.margin = margin(t = -5)),
                      labels = c('B','C'),
                      label_size = 12, label_fontfamily = "Helvetica", label_fontface = "bold",
                      rel_heights = c(1,1.1), ncol = 1)
fig2_row2 = plot_grid(pve_sample_size_plot,
                      ncol = 1, labels = c('D'),
                      label_size = 12, label_fontfamily = "Helvetica", label_fontface = "bold")
Warning in plot_theme(plot): The `panel.margin` theme element is not defined in
the element hierarchy.
Warning: Removed 8 rows containing missing values or values outside the scale range
(`geom_step()`).
fig2_cols12 = plot_grid(fig2_col1, fig2_col2, nrow = 1, rel_widths = c(1.5,1))
fig2_rows12 = plot_grid(fig2_cols12, fig2_row2, nrow = 2, rel_heights = c(1,1))

ggsave("./output/figure2_draft.pdf", fig2_rows12, width = 180, height = 185, units = "mm")

fig2_rows12

Reproducibility Information

Reporting out R session information.

# R code

sessionInfo()
R version 4.5.0 (2025-04-11)
Platform: aarch64-apple-darwin20
Running under: macOS Sequoia 15.7

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRblas.0.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: America/Los_Angeles
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] RColorBrewer_1.1-3 viridis_0.6.5      viridisLite_0.4.2  ggrepel_0.9.6     
 [5] ggnewscale_0.5.2   ggtext_0.1.2       Cairo_1.6-2        cowplot_1.1.3     
 [9] ggpattern_1.1.4    nlme_3.1-168       zoo_1.8-14         ggokabeito_0.1.0  
[13] here_1.0.1         janitor_2.2.1      lubridate_1.9.4    forcats_1.0.0     
[17] stringr_1.5.1      dplyr_1.1.4        purrr_1.0.4        readr_2.1.5       
[21] tidyr_1.3.1        tibble_3.2.1       ggplot2_4.0.0      tidyverse_2.0.0   

loaded via a namespace (and not attached):
 [1] generics_0.1.3    xml2_1.3.8        stringi_1.8.7     lattice_0.22-6   
 [5] hms_1.1.3         digest_0.6.37     magrittr_2.0.3    evaluate_1.0.3   
 [9] grid_4.5.0        timechange_0.3.0  fastmap_1.2.0     rprojroot_2.0.4  
[13] jsonlite_2.0.0    gridExtra_2.3     scales_1.4.0      textshaping_1.0.0
[17] cli_3.6.4         rlang_1.1.6       withr_3.0.2       yaml_2.3.10      
[21] ggbeeswarm_0.7.2  tools_4.5.0       tzdb_0.5.0        vctrs_0.6.5      
[25] R6_2.6.1          lifecycle_1.0.4   snakecase_0.11.1  htmlwidgets_1.6.4
[29] vipor_0.4.7       ragg_1.4.0        beeswarm_0.4.0    pkgconfig_2.0.3  
[33] pillar_1.10.2     gtable_0.3.6      Rcpp_1.0.14       glue_1.8.0       
[37] systemfonts_1.2.2 xfun_0.52         tidyselect_1.2.1  rstudioapi_0.17.1
[41] knitr_1.50        farver_2.1.2      htmltools_0.5.8.1 labeling_0.4.3   
[45] rmarkdown_2.29    compiler_4.5.0    S7_0.2.0          gridtext_0.1.5